library(Rcpp)
cppFunction('
int cpp_gcd(int a, int b) {
while (b != 0) {
int temp = b;
b = a % b;
a = temp;
}
return abs(a);
}
')
cpp_gcd(20, 25)[1] 5
rational classFunction for GCD
library(Rcpp)
cppFunction('
int cpp_gcd(int a, int b) {
while (b != 0) {
int temp = b;
b = a % b;
a = temp;
}
return abs(a);
}
')
cpp_gcd(20, 25)[1] 5
Function for LCM
cppFunction('
int cpp_lcm(int a, int b) {
int x = a;
int y = b;
while (y != 0) {
int temp = y;
y = x % y;
x = temp;
}
return abs(a * b) / abs(x);
}
')
# Test the functions
cpp_lcm(20, 25) # Should return 100[1] 100
# Define the rational S4 class
setClass("rational",
slots = list(numerator = "numeric",
denominator = "numeric"))
# Constructor
rational <- function(numerator, denominator = 1) {
# if (denominator == 0) {
# stop("Denominator cannot be zero.")
# }
obj <- new("rational", numerator = numerator, denominator = denominator)
validObject(obj) # Check validity
return(obj)
}
# Validator
setValidity("rational", function(object) {
if (object@denominator == 0) {
return("Denominator cannot be zero.")
}
TRUE
})Class "rational" [in ".GlobalEnv"]
Slots:
Name: numerator denominator
Class: numeric numeric
# Show method
setMethod("show", "rational", function(object) {
cat(object@numerator, "/", object@denominator, "\n")
})
# Simplify method
setGeneric("simplify", function(object) standardGeneric("simplify"))[1] "simplify"
setMethod("simplify", "rational", function(object) {
a <- abs(object@numerator)
b <- abs(object@denominator)
gcd <- cpp_gcd(a, b)
# Simplify by devide gcd
new("rational", numerator = object@numerator / gcd, denominator = object@denominator / gcd)
})
# Quotient method
setGeneric("quotient", function(object, digit = 2) standardGeneric("quotient"))[1] "quotient"
setMethod("quotient", "rational", function(object, digit = 2) {
if (digit %% 1 != 0) {
stop("Digits must be a non-negative number.")
}
result <- object@numerator / object@denominator
format_string <- paste0("%.", digit, "f")
round_r <- sprintf(format_string,round(result, digit))
print(round_r)
return(result)
})
# Arithmetic methods
setMethod("+", signature(e1 = "rational", e2 = "rational"), function(e1, e2) {
lcm_denom <- cpp_lcm(e1@denominator, e2@denominator)
new_numerator <- e1@numerator * (lcm_denom / e1@denominator) + e2@numerator * (lcm_denom / e2@denominator)
simplify(new("rational", numerator = new_numerator, denominator = lcm_denom))
})
setMethod("-", signature(e1 = "rational", e2 = "rational"), function(e1, e2) {
lcm_denom <- cpp_lcm(e1@denominator, e2@denominator)
new_numerator <- e1@numerator * (lcm_denom / e1@denominator) - e2@numerator * (lcm_denom / e2@denominator)
simplify(new("rational", numerator = new_numerator, denominator = lcm_denom))
})
setMethod("*", signature(e1 = "rational", e2 = "rational"), function(e1, e2) {
simplify(new("rational", numerator = e1@numerator * e2@numerator, denominator = e1@denominator * e2@denominator))
})
setMethod("/", signature(e1 = "rational", e2 = "rational"), function(e1, e2) {
if (e2@numerator == 0) stop("Cannot divide by zero.")
simplify(new("rational", numerator = e1@numerator * e2@denominator, denominator = e1@denominator * e2@numerator))
})rational class to create three objects:# Create instances of Rational numbers
r1 <- rational(24, 6)
r2 <- rational(7, 230)
r3 <- rational(0, 4)
print(c(r1, r2, r3))[[1]]
24 / 6
[[2]]
7 / 230
[[3]]
0 / 4
r124 / 6
r30 / 4
r1 + r2927 / 230
r1 - r2913 / 230
r1 * r214 / 115
r1 / r2920 / 7
r1 + r34 / 1
r1 * r30 / 1
r2 / r3Error in r2/r3: Cannot divide by zero.
quotient(r1)[1] "4.00"
[1] 4
quotient(r2)[1] "0.03"
[1] 0.03043478
quotient(r2, digit = 3)[1] "0.030"
[1] 0.03043478
quotient(r2, digit = 3.14)Error in quotient(r2, digit = 3.14): Digits must be a non-negative number.
quotient(r2, digit = "avocado")Error in digit%%1: non-numeric argument to binary operator
q2 <- quotient(r2, digit = 3)[1] "0.030"
q2[1] 0.03043478
quotient(r3)[1] "0.00"
[1] 0
simplify(r1)4 / 1
simplify(r2)7 / 230
simplify(r3)0 / 1
r4 <- rational(24, 0)Error in validObject(.Object): invalid class "rational" object: Denominator cannot be zero.
Does the distribution of genre of sales across years appear to change?
# Import data
art_sales <- read.csv("/Users/nynn/Library/CloudStorage/OneDrive-Umich/Umich course/2024_Fall/Stats 506/Stats_506_PS/Stats_506_PS4/df_for_ml_improved_new_market.csv")
# Load necessary libraries
library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
# Summarize data by year and genre, including NAs/no info as others
genre_summary <- art_sales %>%
group_by(year) %>%
summarise(
Photography = sum(Genre___Photography, na.rm = TRUE),
Print = sum(Genre___Print, na.rm = TRUE),
Sculpture = sum(Genre___Sculpture, na.rm = TRUE),
Painting = sum(Genre___Painting, na.rm = TRUE),
Other = sum(Genre___Photography == 0 & Genre___Print == 0 &
Genre___Sculpture == 0 & Genre___Painting == 0)
)
# Reshape data for Plotly
genre_long <- genre_summary %>%
pivot_longer(cols = c(Photography, Print, Painting, Sculpture, Other),
names_to = "Genre",
values_to = "Count") %>%
group_by(year) %>%
mutate(Percentage = round(Count / sum(Count) * 100, 1))
# Absolute number plot
absolute_plot <- plot_ly(genre_long, x = ~as.factor(year), y = ~Count, color = ~Genre, type = "bar") %>%
layout(
title = "Distribution of Genre Over Years - Absolute Number",
xaxis = list(title = "Year"),
yaxis = list(title = "Number of Sales"),
barmode = "stack",
legend = list(title = list(text = "Genre"))
)
absolute_plot# Percentage plot (stacked to 100%)
percentage_plot <- plot_ly(genre_long, x = ~as.factor(year), y = ~Percentage, color = ~Genre, type = "bar", text = ~Genre) %>%
layout(
title = "Distribution of Genre Over Years - Percentage",
xaxis = list(title = "Year"),
yaxis = list(title = "Percentage of Sales", ticksuffix = "%"),
barmode = "stack",
legend = list(title = list(text = "Genre"))
)
percentage_plotIs there a change in the sales price in USD over time?
How does the genre affect the change in sales price over time?
# Prepare data with median and IQR
df_yearly_price <- art_sales %>%
group_by(year) %>%
summarise(
median_price_usd = median(price_usd, na.rm = TRUE),
lower_q = quantile(price_usd, 0.25, na.rm = TRUE), # 25th percentile
upper_q = quantile(price_usd, 0.75, na.rm = TRUE) # 75th percentile
)
# Create the interactive Plotly plot
plot <- plot_ly(df_yearly_price, x = ~year) %>%
# Add ribbon for IQR
add_ribbons(ymin = ~lower_q, ymax = ~upper_q, name = "IQR (25th - 75th Percentile)",
fillcolor = 'rgba(173, 216, 230, 0.4)', line = list(color = 'transparent')) %>%
# Add median line
add_lines(y = ~median_price_usd, name = "Median Price", line = list(color = 'blue', width = 2)) %>%
# Add median points
add_markers(y = ~median_price_usd, name = "Median Price", marker = list(color = 'blue', size = 6)) %>%
# Layout for titles and styling
layout(
title = "Sales Price Distribution Over Time with IQR",
xaxis = list(title = "Year"),
yaxis = list(title = "Sales Price (USD)"),
legend = list(title = list(text = "Legend")),
hovermode = "x unified" # Display hover info for all elements together
)
plot# Create a dataset for genre
art_sales_genre <- art_sales %>%
mutate(Genre = case_when(
Genre___Photography == 1 ~ "Photography",
Genre___Print == 1 ~ "Print",
Genre___Sculpture == 1 ~ "Sculpture",
Genre___Painting == 1 ~ "Painting",
TRUE ~ "Other"
)) %>%
select(year, price_usd, Genre)
art_sales_genre$Genre <- factor(art_sales_genre$Genre, levels = c("Painting", "Photography", "Print", "Sculpture", "Other"))
# Calculate IQR (Interquartile Range) for each genre and year
genre_price_summary <- art_sales_genre %>%
group_by(year, Genre) %>%
summarise(
median_price = median(price_usd, na.rm = TRUE),
lower_q = quantile(price_usd, 0.25, na.rm = TRUE), # 25th percentile
upper_q = quantile(price_usd, 0.75, na.rm = TRUE) # 75th percentile
)`summarise()` has grouped output by 'year'. You can override using the
`.groups` argument.
# Generate a list of plotly plots for each genre with IQR ribbons and median lines
plots <- genre_price_summary %>%
group_by(Genre) %>%
do(plot = plot_ly(data = ., x = ~year) %>%
add_ribbons(
ymin = ~lower_q,
ymax = ~upper_q,
fillcolor = ~Genre,
name = "IQR",
showlegend = FALSE,
opacity = 0.5
) %>%
add_lines(
y = ~median_price,
color = ~Genre,
name = ~Genre,
line = list(width = 3),
hoverinfo = "text",
text = ~paste("Year:", year,
"<br>Median Price:", round(median_price, 2),
"<br>Genre:", Genre)
) %>%
layout(
title = list(text = ~unique(Genre), xref = "paper"),
xaxis = list(title = "Year"),
yaxis = list(title = "Median Sales Price (USD)")
))
# Use subplot to create a faceted plot with each genre in a separate facet, similar to ggplot's facet_wrap
final_plot <- subplot(plots$plot, nrows = 2, shareX = TRUE, titleX = TRUE)
# Display the final interactive plot with a shared legend at the top
final_plot %>%
layout(
title = "Sales Price Over Time by Genre with IQR",
legend = list(orientation = "h", x = 0.5, xanchor = "center"),
margin = list(t = 50)
)# Load necessary libraries
library(data.table)
Attaching package: 'data.table'
The following objects are masked from 'package:lubridate':
hour, isoweek, mday, minute, month, quarter, second, wday, week,
yday, year
The following objects are masked from 'package:dplyr':
between, first, last
The following object is masked from 'package:purrr':
transpose
library(nycflights13)
# Convert to data.table format
flights_dt <- as.data.table(flights)
airports_dt <- as.data.table(airports)
# Departure delays
departure_delays <- flights_dt[
, .(mean_delay = mean(dep_delay, na.rm = TRUE),
med_delay = median(dep_delay, na.rm = TRUE),
numflights = .N), by = origin
][numflights >= 10][order(-mean_delay)]
# Join with airports data to get airport names
departure_delays <- departure_delays[
airports_dt, on = .(origin = faa), nomatch = 0
][, .(name, mean_delay, med_delay)][order(-mean_delay)]
# Display the result for departure delays
print(departure_delays) name mean_delay med_delay
<char> <num> <num>
1: Newark Liberty Intl 15.10795 -1
2: John F Kennedy Intl 12.11216 -1
3: La Guardia 10.34688 -3
# Arrival delays
arrival_delays <- flights_dt[
, .(mean_delay = mean(arr_delay, na.rm = TRUE),
med_delay = median(arr_delay, na.rm = TRUE),
numflights = .N), by = dest
][numflights >= 10][order(-mean_delay)]
# Join with airports data to get airport names
arrival_delays <- arrival_delays[
airports_dt, on = .(dest = faa), nomatch = 0
][, name := coalesce(name, dest)][, .(name, mean_delay, med_delay)][order(-mean_delay)]
# Display all rows for arrival delays
print(arrival_delays) name mean_delay med_delay
<char> <num> <num>
1: Columbia Metropolitan 41.76415094 28.0
2: Tulsa Intl 33.65986395 14.0
3: Will Rogers World 30.61904762 16.0
4: Jackson Hole Airport 28.09523810 15.0
5: Mc Ghee Tyson 24.06920415 2.0
6: Dane Co Rgnl Truax Fld 20.19604317 1.0
7: Richmond Intl 20.11125320 1.0
8: Akron Canton Regional Airport 19.69833729 3.0
9: Des Moines Intl 19.00573614 0.0
10: Gerald R Ford Intl 18.18956044 1.0
11: Birmingham Intl 16.87732342 -2.0
12: Theodore Francis Green State 16.23463687 1.0
13: Greenville-Spartanburg International 15.93544304 -0.5
14: Cincinnati Northern Kentucky Intl 15.36456376 -3.0
15: Savannah Hilton Head Intl 15.12950601 -1.0
16: Manchester Regional Airport 14.78755365 -3.0
17: Eppley Afld 14.69889841 -2.0
18: Yeager 14.67164179 -1.5
19: Kansas City Intl 14.51405836 0.0
20: Albany Intl 14.39712919 -4.0
21: General Mitchell Intl 14.16722038 0.0
22: Piedmont Triad 14.11260054 -2.0
23: Washington Dulles Intl 13.86420212 -3.0
24: Cherry Capital Airport 12.96842105 -10.0
25: James M Cox Dayton Intl 12.68048606 -3.0
26: Louisville International Airport 12.66938406 -2.0
27: Chicago Midway Intl 12.36422360 -1.0
28: Sacramento Intl 12.10992908 4.0
29: Jacksonville Intl 11.84483416 -2.0
30: Nashville Intl 11.81245891 -2.0
31: Portland Intl Jetport 11.66040210 -4.0
32: Greater Rochester Intl 11.56064461 -5.0
33: Hartsfield Jackson Atlanta Intl 11.30011285 -1.0
34: Lambert St Louis Intl 11.07846451 -3.0
35: Norfolk Intl 10.94909344 -4.0
36: Baltimore Washington Intl 10.72673385 -5.0
37: Memphis Intl 10.64531435 -2.5
38: Port Columbus Intl 10.60132291 -3.0
39: Charleston Afb Intl 10.59296847 -4.0
40: Philadelphia Intl 10.12719014 -3.0
41: Raleigh Durham Intl 10.05238095 -3.0
42: Indianapolis Intl 9.94043412 -3.0
43: Charlottesville-Albemarle 9.50000000 -5.0
44: Cleveland Hopkins Intl 9.18161129 -5.0
45: Ronald Reagan Washington Natl 9.06695204 -2.0
46: Burlington Intl 8.95099602 -4.0
47: Buffalo Niagara Intl 8.94595186 -5.0
48: Syracuse Hancock Intl 8.90392501 -5.0
49: Denver Intl 8.60650021 -2.0
50: Palm Beach Intl 8.56297210 -3.0
51: Bob Hope 8.17567568 -3.0
52: Fort Lauderdale Hollywood Intl 8.08212154 -3.0
53: Bangor Intl 8.02793296 -9.0
54: Asheville Regional Airport 8.00383142 -1.0
55: Pittsburgh Intl 7.68099053 -5.0
56: Gallatin Field 7.60000000 -2.0
57: NW Arkansas Regional 7.46572581 -2.0
58: Tampa Intl 7.40852503 -4.0
59: Charlotte Douglas Intl 7.36031885 -3.0
60: Minneapolis St Paul Intl 7.27016886 -5.0
61: William P Hobby 7.17618819 -4.0
62: Bradley Intl 7.04854369 -10.0
63: San Antonio Intl 6.94537178 -9.0
64: South Bend Rgnl 6.50000000 -3.5
65: Louis Armstrong New Orleans Intl 6.49017497 -6.0
66: Key West Intl 6.35294118 7.0
67: Eagle Co Rgnl 6.30434783 -4.0
68: Austin Bergstrom Intl 6.01990875 -5.0
69: Chicago Ohare Intl 5.87661475 -8.0
70: Orlando Intl 5.45464309 -5.0
71: Detroit Metro Wayne Co 5.42996346 -7.0
72: Portland Intl 5.14157973 -5.0
73: Nantucket Mem 4.85227273 -3.0
74: Wilmington Intl 4.63551402 -7.0
75: Myrtle Beach Intl 4.60344828 -13.0
76: Albuquerque International Sunport 4.38188976 -5.5
77: George Bush Intercontinental 4.24079040 -5.0
78: Norman Y Mineta San Jose Intl 3.44817073 -7.0
79: Southwest Florida Intl 3.23814963 -5.0
80: San Diego Intl 3.13916574 -5.0
81: Sarasota Bradenton Intl 3.08243131 -5.0
82: Metropolitan Oakland Intl 3.07766990 -9.0
83: General Edward Lawrence Logan Intl 2.91439222 -9.0
84: San Francisco Intl 2.67289152 -8.0
85: Yampa Valley 2.14285714 2.0
86: Phoenix Sky Harbor Intl 2.09704733 -6.0
87: Montrose Regional Airport 1.78571429 -10.5
88: Los Angeles Intl 0.54711094 -7.0
89: Dallas Fort Worth Intl 0.32212685 -9.0
90: Miami Intl 0.29905978 -9.0
91: Mc Carran Intl 0.25772849 -8.0
92: Salt Lake City Intl 0.17625459 -8.0
93: Long Beach -0.06202723 -10.0
94: Martha\\\\'s Vineyard -0.28571429 -11.0
95: Seattle Tacoma Intl -1.09909910 -11.0
96: Honolulu Intl -1.36519258 -7.0
97: John Wayne Arpt Orange Co -7.86822660 -11.0
98: Palm Springs Intl -12.72222222 -13.5
name mean_delay med_delay
planes_dt <- as.data.table(planes)
fastest_aircraft <- flights_dt[
planes_dt,
on = "tailnum",
.(model, mph = distance / (air_time / 60)), # Calculate speed in join
nomatch = NULL
][
, .(avgmph = mean(mph, na.rm = TRUE), nflights = .N), by = model # Aggregate by model
][order(-avgmph)][1] # Sort by avgmph and take top row
print(fastest_aircraft) model avgmph nflights
<char> <num> <int>
1: 777-222 482.6254 4